#!/usr/local/bin/python
# coding=utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf8')

import log
import os
import re
import shutil

import xlrd

def find_phone_num(text):
    # phone_pat = re.compile('^(13\d|14[5|7]|15\d|166|17[3|6|7]|18\d)\d{8}$')
    # return re.findall(phone_pat,test)
    # print text
    # return re.findall(r"\(?0\d{2,3}[) -]?\d{7,8}",text)
    return re.findall(r"1\d{10}",text)

def open_excel(file):
	try:
		data = xlrd.open_workbook(file)
		return data
	except Exception,e:
		print str(e)


def excel_table_byindex(file,colnameindex=0,by_index=0):
	data = open_excel(file)
	table = data.sheets()[by_index]
	nrows = table.nrows #行数
	ncols = table.ncols #列数
	colnames =  table.row_values(colnameindex) #某一行数据 
	list =[]
	print '行数：',nrows,' 列数:',ncols
	for rownum in range(1,nrows):
		row = table.row_values(rownum)
		if row:
			app = {}
			for i in range(len(colnames)):
				# app[colnames[i]] = row[i] 
				app[i] = row[i] 
			list.append(app)
	return list

def executeExcel(file,colnameindex=0,by_index=0):
    log.debug(file)
    data = open_excel(file)
    if data==None:
        print file, '数据异常 解析失败 code = -1'
        path = os.path.abspath('.') + os.sep + '异常数据'
        fileInfo = os.path.splitext(file)
        filename = fileInfo[0].split('/')
        shutil.copyfile(file, path + os.sep + filename[len(filename)-1] + fileInfo[1])
        return []
    size = len(data.sheets())
    ret = []
    for i in range(size) :
        table = data.sheets()[i]
        nrows = table.nrows #行数
        ncols = table.ncols #列数
        # print '行数：',nrows,' 列数:',ncols
        if ncols==0 :
            if i==0 and size==1:
                path = os.path.abspath('.') + os.sep + '异常数据'
                fileInfo = os.path.splitext(file)
                filename = fileInfo[0].split('/')
                shutil.copyfile(file, path + os.sep + filename[len(filename)-1] + fileInfo[1])
                print file, '数据异常 解析失败 code = ', i, fileInfo
                break
            continue
        colnames =  table.row_values(colnameindex) #某一行数据 
        for rownum in range(1,nrows):
            row = table.row_values(rownum)
            if row:
                for j in range(len(colnames)):
                    ret.extend(find_phone_num(str(row[j])))

    # log.debug('over')
    # print len(ret)
    tdict = {}
    for i in ret :
        tdict[i] = 1
    # print len(tdict.keys())
    return tdict.keys()

def execute():
    # log.debug('hehe')
    # print find_phone_num('13911111111ss  sfdfd 18896968289 dsasdf ddfdf')
    if os.path.exists(os.path.abspath('.') + os.sep + '异常数据'):
        shutil.rmtree(os.path.abspath('.') + os.sep + '异常数据')
    os.makedirs(os.path.abspath('.') + os.sep + '异常数据')

    dir_path = './expfilter/data' #raw_input('请输入要处理的文件夹名(相对路径)：\n')
    filelist = os.listdir(dir_path)
    ret = []
    for filename in filelist:
        fileInfo = os.path.splitext(filename)
        if fileInfo[1] == '.xls':
            ret.extend(executeExcel(os.path.join(dir_path , filename)))
    # print ret
    # ret = [1,2,34,'3234234']
    with open('手机号筛选结果.txt','w') as file_obj:
        for item in ret:
            file_obj.write(str(item) + '\n')